Combining Simulation Results

library(tidyverse)
## Warning: package 'dplyr' was built under R version 4.0.5
#read in all files
setwd("~/Desktop/Cluster/OOSEst/26Oct2023")
files <- list.files(pattern=".Rdata")

all_res <- target_res <- c()
for (i in 1:length(files)) {
  load(file = files[i])
  
  #all metrics
  all_res_ind <- results$all_res
  all_res <- bind_rows(all_res, all_res_ind)
  
  #target metrics
  target_res_ind <- results$target_res %>%
    mutate(K = unique(all_res_ind$K),
           n_mean = unique(all_res_ind$n_mean),
           n_sd = unique(all_res_ind$n_sd),
           covars_fix = unique(all_res_ind$covars_fix),
           covars_rand = unique(all_res_ind$covars_rand),
           lin = unique(all_res_ind$lin),
           eps_study_m = unique(all_res_ind$eps_study_m),
           eps_study_tau = unique(all_res_ind$eps_study_tau),
           eps_study_inter = unique(all_res_ind$eps_study_inter),
           distribution = unique(all_res_ind$distribution))
  target_res <- bind_rows(target_res, target_res_ind)
  
  print(i)
}

#checking output
seeds <- strsplit(files,"_") %>% map_chr(.f=function(x) x[2])
length(unique(seeds)) == length(files) #making sure every iteration has a unique seed

nrow(all_res) #to make sure every iteration ran
nrow(target_res)/800
setwd("~/Dropbox/Moderation/Out of Sample Prediction/Data")
saveRDS(all_res, "all_res_26Oct2023.RDS")
saveRDS(target_res, "target_res_26Oct2023.RDS")
all_res <- readRDS("Data/all_res_26Oct2023.RDS")
target_res <- readRDS("Data/target_res_26Oct2023.RDS")

#summarise across iterations
target_ind <- target_res %>%
  group_by(id, W, sex, smstat, age, age2, weight, madrs, Y, tau, method,
           K, n_mean, n_sd, covars_fix, covars_rand, lin, eps_study_m,
           eps_study_tau, eps_study_inter, distribution) %>%
  summarise(n_iter = n(),
            coverage = mean(coverage),
            bias = mean(bias),
            abs_bias = mean(abs_bias),
            length = mean(length),
            significant = mean(significant)) %>%
  mutate(scenario = paste(covars_fix, covars_rand, lin, eps_study_m,
                          eps_study_tau, eps_study_inter, sep = "_"),
         sds = paste(eps_study_m, eps_study_tau, eps_study_inter, sep = "_"),
         Linear = ifelse(lin == T, "Linear", "Non-Linear"),
         sds_lab = factor(case_when(sds == "0.05_0.05_0.05" ~ "Low Heterogeneity",
                             sds == "1_0.05_0.05" ~ "Heterogeneous Intercept",
                             sds == "1_0.5_0.05" ~ "Heterogeneous Main",
                             sds == "1_1_0.5" ~ "High Heterogeneity"), 
                          levels = c("Low Heterogeneity", "Heterogeneous Intercept",
                                     "Heterogeneous Main", "High Heterogeneity")))

#average across target sample
target_sum <- target_ind %>%
  group_by(method,
           K, n_mean, n_sd, covars_fix, covars_rand, lin, eps_study_m,
           eps_study_tau, eps_study_inter, distribution) %>%
  summarise(across(coverage:significant, list(mean = mean, sd = sd))) %>%
  mutate(scenario = paste(covars_fix, covars_rand, lin, eps_study_m,
                          eps_study_tau, eps_study_inter, sep = "_"),
         sds = paste(eps_study_m, eps_study_tau, eps_study_inter, sep = "_"),
         Linear = ifelse(lin == T, "Linear", "Non-Linear"),
         sds_lab = factor(case_when(sds == "0.05_0.05_0.05" ~ "Low Heterogeneity",
                             sds == "1_0.05_0.05" ~ "Heterogeneous Intercept",
                             sds == "1_0.5_0.05" ~ "Heterogeneous Main",
                             sds == "1_1_0.5" ~ "High Heterogeneity"), 
                          levels = c("Low Heterogeneity", "Heterogeneous Intercept",
                                     "Heterogeneous Main", "High Heterogeneity")))

#long form for boxplot
all_long <- all_res %>%
  rename(MA = manual_res, MA_Inc = manual_res_wrong,
         HCF = cf_res, ACF = cf_a_res,
         HCF_Rand = cf_res_rand, ACF_Rand = cf_a_res_rand,
         SBART = sb_res, SBART_Rand = sb_res_rand) %>%
  pivot_longer(cols = MA:SBART_Rand,
               names_to = "Method",
               values_to = "Value") %>%
  mutate(scenario = paste(covars_fix, covars_rand, lin, eps_study_m,
                          eps_study_tau, eps_study_inter, sep = "_"),
         sds = paste(eps_study_m, eps_study_tau, eps_study_inter, sep = "_"),
         Dataset = factor(ifelse(grepl("target", Metric) == T,
                                 "Target", "Training"),
                          levels = c("Training", "Target")),
         Linear = ifelse(lin == T, "Linear", "Non-Linear"))

#average across iterations
all_sum <- all_res %>%
  rename(MA = manual_res, MA_Inc = manual_res_wrong,
         HCF = cf_res, ACF = cf_a_res,
         HCF_Rand = cf_res_rand, ACF_Rand = cf_a_res_rand,
         SBART = sb_res, SBART_Rand = sb_res_rand) %>%
  group_by(Metric, K, n_mean, n_sd, covars_fix, covars_rand,
           lin, eps_study_m, eps_study_tau, eps_study_inter,
           distribution) %>%
  summarise(across(MA:SBART_Rand, list(mean = mean, sd = sd)),
            n_iter = n())
summary(all_sum$n_iter) #check iterations
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     500     500     500     500     500     500
#average in long form
all_avg_long <- all_res %>%
  rename(MA = manual_res, MA_Inc = manual_res_wrong,
         HCF = cf_res, ACF = cf_a_res,
         HCF_Rand = cf_res_rand, ACF_Rand = cf_a_res_rand,
         SBART = sb_res, SBART_Rand = sb_res_rand) %>%
  group_by(Metric, K, n_mean, n_sd, covars_fix, covars_rand,
           lin, eps_study_m, eps_study_tau, eps_study_inter,
           distribution) %>%
  summarise(across(MA:SBART_Rand, mean)) %>%
  pivot_longer(cols = MA:SBART_Rand,
               names_to = "Method",
               values_to = "Value") %>%
  mutate(scenario = paste(covars_fix, covars_rand, lin, eps_study_m,
                          eps_study_tau, eps_study_inter, sep = "_"),
         sds = paste(eps_study_m, eps_study_tau, eps_study_inter, sep = "_"))

TARGET EVAL: Age Moderator, Same Training Distribution

Coverage

#boxplot across 100 people in target sample
target_ind %>%
  filter(covars_fix == "age",
         distribution == "same",
         method %in% c("ACF", "HCF", "MA", "MA_Inc", "SBART")) %>%
  ggplot(aes(x=method)) +
  geom_boxplot(aes(y=coverage)) +
  geom_hline(aes(yintercept = 0.95), linetype = "dashed") +
  facet_grid(Linear ~ sds_lab, scales = "free") +
  scale_y_continuous(labels = scales::percent) +
  theme(axis.text.x = element_text(angle=90),
        text = element_text(size=15)) +
  ylab("Coverage in Target Sample") +
  xlab("")

#ggsave("Results/26Oct2023/coverage.jpeg")
#for job talk
target_ind %>%
  filter(covars_fix == "age",
         distribution == "same",
         method %in% c("HCF", "MA", "SBART")) %>%
  mutate(method = factor(method, levels = c("MA", "HCF", "SBART"))) %>%
  ggplot(aes(x=method)) +
  geom_boxplot(aes(y=coverage)) +
  geom_hline(aes(yintercept = 0.95), linetype = "dashed") +
  facet_grid(Linear ~ sds_lab, scales = "free") +
  scale_y_continuous(labels = scales::percent) +
  theme(axis.text.x = element_text(angle=90),
        text = element_text(size=12)) +
  ylab("Coverage in Target Sample") +
  xlab("")
#average across all iterations
target_sum %>%
  filter(covars_fix == "age",
         distribution == "same",
         method %in% c("ACF", "HCF", "MA", "MA_Inc", "SBART")) %>%
  ggplot(aes(x=method)) +
  geom_point(aes(y=coverage_mean)) +
  # geom_errorbar(aes(ymin = coverage_mean - 1.96*coverage_sd,
  #                   ymax = min(coverage_mean + 1.96*coverage_sd, 1),
  #                   color=method)) +
  geom_hline(aes(yintercept = 0.95), linetype = "dashed") +
  facet_grid(Linear ~ sds) +
  scale_y_continuous(labels = scales::percent) +
  theme(axis.text.x = element_text(angle=90),
        text = element_text(size=12)) +
  ylab("Coverage in Target Sample")

#coverage by age - all
target_ind %>%
  filter(covars_fix == "age",
         distribution == "same",
         method %in% c("HCF", "MA", "SBART")) %>%
  ggplot(aes(x=age, y=coverage, color=method)) +
  geom_smooth(se=F) +
  geom_hline(aes(yintercept = 0.95), linetype = "dashed") +
  scale_y_continuous(labels = scales::percent) +
  facet_grid(Linear ~ sds_lab, scales = "free")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

#points
target_ind %>%
  filter(covars_fix == "age",
         distribution == "same",
         method %in% c("HCF", "MA", "SBART")) %>%
  ggplot(aes(x=age, y=coverage, color=method)) +
  geom_point() +
  geom_hline(aes(yintercept = 0.95), linetype = "dashed") +
  scale_y_continuous(labels = scales::percent) +
  facet_grid(Linear ~ sds_lab, scales = "free")

#coverage by madrs - all
target_ind %>%
  filter(covars_fix == "age",
         distribution == "same",
         method %in% c("HCF", "MA", "SBART")) %>%
  ggplot(aes(x=madrs, y=coverage, color=method)) +
  geom_smooth(se=F) +
  geom_hline(aes(yintercept = 0.95), linetype = "dashed") +
  scale_y_continuous(labels = scales::percent) +
  facet_grid(Linear ~ sds_lab, scales = "free")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

#points
target_ind %>%
  filter(covars_fix == "age",
         distribution == "same",
         method %in% c("HCF", "MA", "SBART")) %>%
  ggplot(aes(x=madrs, y=coverage, color=method)) +
  geom_point() +
  geom_hline(aes(yintercept = 0.95), linetype = "dashed") +
  scale_y_continuous(labels = scales::percent) +
  facet_grid(Linear ~ sds_lab, scales = "free")

#coverage by age - HCF
target_ind %>%
  filter(covars_fix == "age",
         distribution == "same",
         method == "HCF") %>%
  ggplot(aes(x=age, y=coverage)) +
  geom_point() +
  scale_y_continuous(labels = scales::percent) +
  facet_grid(Linear ~ sds_lab, scales = "free") +
  ggtitle("Results Using HCF")
#coverage by madrs - HCF
target_ind %>%
  filter(covars_fix == "age",
         distribution == "same",
         method == "HCF") %>%
  ggplot(aes(x=madrs, y=coverage)) +
  geom_point() +
  scale_y_continuous(labels = scales::percent) +
  facet_grid(Linear ~ sds_lab, scales = "free") +
  ggtitle("Results Using HCF")
#coverage by age - SBART
target_ind %>%
  filter(covars_fix == "age",
         distribution == "same",
         method == "SBART") %>%
  ggplot(aes(x=age, y=coverage)) +
  geom_point() +
  scale_y_continuous(labels = scales::percent) +
  facet_grid(Linear ~ sds_lab, scales = "free") +
  ggtitle("Results Using SBART")

Length

#boxplot across 100 people in target sample
target_ind %>%
  filter(covars_fix == "age",
         distribution == "same",
         method %in% c("ACF", "HCF", "MA", "MA_Inc", "SBART")) %>%
  ggplot(aes(x=method)) +
  geom_boxplot(aes(y=length)) +
  facet_grid(Linear ~ sds_lab, scales = "free") +
  theme(axis.text.x = element_text(angle=90),
        text = element_text(size=12)) +
  ylab("Interval Length in Target Sample") +
  xlab("")
#ggsave("Results/26Oct2023/length.jpeg")
#average across all iterations
target_sum %>%
  filter(covars_fix == "age",
         distribution == "same",
         method %in% c("ACF", "HCF", "MA", "MA_Inc", "SBART")) %>%
  ggplot(aes(x=method)) +
  geom_point(aes(y=length_mean)) +
  # geom_errorbar(aes(ymin = coverage_mean - 1.96*coverage_sd,
  #                   ymax = min(coverage_mean + 1.96*coverage_sd, 1),
  #                   color=method)) +
  facet_grid(Linear ~ sds) +
  theme(axis.text.x = element_text(angle=90),
        text = element_text(size=12)) +
  ylab("CI Length in Target Sample")

#length by age - all
target_ind %>%
  filter(covars_fix == "age",
         distribution == "same",
         method %in% c("HCF", "MA", "SBART")) %>%
  ggplot(aes(x=age, y=length, color=method)) +
  geom_smooth(se=F) +
  facet_grid(Linear ~ sds_lab, scales = "free")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

#points
target_ind %>%
  filter(covars_fix == "age",
         distribution == "same",
         method %in% c("HCF", "MA", "SBART")) %>%
  ggplot(aes(x=age, y=length, color=method)) +
  geom_point() +
  facet_grid(Linear ~ sds_lab, scales = "free")

Bias

target_ind %>%
  filter(covars_fix == "age",
         distribution == "same",
         method %in% c("ACF", "HCF", "MA", "MA_Inc", "SBART")) %>%
  mutate(sds_lab = factor(case_when(sds == "0.05_0.05_0.05" ~ "Low Heterogeneity",
                             sds == "1_0.05_0.05" ~ "Heterogeneous Intercept",
                             sds == "1_0.5_0.05" ~ "Heterogeneous Main",
                             sds == "1_1_0.5" ~ "High Heterogeneity"), 
                          levels = c("Low Heterogeneity", "Heterogeneous Intercept",
                                     "Heterogeneous Main", "High Heterogeneity"))) %>%
  ggplot(aes(x=method)) +
  geom_boxplot(aes(y=abs_bias)) +
  facet_grid(Linear ~ sds_lab, scales = "free") +
  theme(axis.text.x = element_text(angle=90),
        text = element_text(size=12)) +
  ylab("Absolute Bias in Target Sample") +
  xlab("")

ggsave("Results/26Oct2023/abs_bias.jpeg")
## Saving 10 x 7 in image
#for job talk
target_ind %>%
  filter(covars_fix == "age",
         distribution == "same",
         method %in% c("HCF", "MA", "SBART")) %>%
  mutate(method = factor(method, levels = c("MA", "HCF", "SBART")),
         sds_lab = factor(case_when(sds == "0.05_0.05_0.05" ~ "Low Heterogeneity",
                             sds == "1_0.05_0.05" ~ "Heterogeneous Intercept",
                             sds == "1_0.5_0.05" ~ "Heterogeneous Main",
                             sds == "1_1_0.5" ~ "High Heterogeneity"), levels = c("Low Heterogeneity", "Heterogeneous Intercept", "Heterogeneous Main", "High Heterogeneity"))) %>%
  ggplot(aes(x=method)) +
  geom_boxplot(aes(y=abs_bias)) +
  facet_grid(Linear ~ sds_lab, scales = "free") +
  theme(axis.text.x = element_text(angle=90),
        text = element_text(size=15)) +
  ylab("Absolute Bias in Target Sample") +
  xlab("")
#bias by age - all
target_ind %>%
  filter(covars_fix == "age",
         distribution == "same",
         method %in% c("HCF", "MA", "SBART")) %>%
  ggplot(aes(x=age, y=bias, color=method)) +
  geom_smooth(se=F) +
  facet_grid(Linear ~ sds_lab, scales = "free")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

#points
target_ind %>%
  filter(covars_fix == "age",
         distribution == "same",
         method %in% c("HCF", "MA", "SBART")) %>%
  ggplot(aes(x=age, y=bias, color=method)) +
  geom_point() +
  facet_grid(Linear ~ sds_lab, scales = "free")

TARGET EVAL: Other Settings

Age Moderator, Varying MADRS in Training Distributions

#boxplot across 100 people in target sample
target_ind %>%
  filter(covars_fix == "age",
         distribution == "varying_madrs",
         method %in% c("ACF", "HCF", "MA", "MA_Inc", "SBART")) %>%
  ggplot(aes(x=method, group=method)) +
  geom_boxplot(aes(y=coverage, color=method)) +
  geom_hline(aes(yintercept = 0.95), linetype = "dashed") +
  facet_grid(Linear ~ sds, scales = "free") +
  theme(axis.text.x = element_text(angle=90),
        text = element_text(size=12)) +
  ylab("Coverage in Target Sample")

#average across all iterations
target_sum %>%
  filter(covars_fix == "age",
         distribution == "varying_madrs",
         method %in% c("ACF", "HCF", "MA", "MA_Inc", "SBART")) %>%
  ggplot(aes(x=method)) +
  geom_point(aes(y=coverage_mean)) +
  # geom_errorbar(aes(ymin = coverage_mean - 1.96*coverage_sd,
  #                   ymax = min(coverage_mean + 1.96*coverage_sd, 1),
  #                   color=method)) +
  geom_hline(aes(yintercept = 0.95), linetype = "dashed") +
  facet_grid(Linear ~ sds) +
  scale_y_continuous(labels = scales::percent) +
  theme(axis.text.x = element_text(angle=90),
        text = element_text(size=12)) +
  ylab("Coverage in Target Sample")

#coverage by age - HCF
target_ind %>%
  filter(covars_fix == "age",
         distribution == "varying_madrs",
         method == "HCF") %>%
  ggplot(aes(x=age, y=coverage)) +
  geom_point() +
  scale_y_continuous(labels = scales::percent) +
  facet_grid(Linear ~ sds_lab, scales = "free") +
  ggtitle("Results Using HCF")

#covarage by madrs - HCF
target_ind %>%
  filter(covars_fix == "age",
         distribution == "varying_madrs",
         method == "HCF") %>%
  ggplot(aes(x=madrs, y=coverage)) +
  geom_point() +
  scale_y_continuous(labels = scales::percent) +
  facet_grid(Linear ~ sds_lab, scales = "free") +
  ggtitle("Results Using HCF")

Age Moderator, Varying Age in Training Distributions

#boxplot across 100 people in target sample
target_ind %>%
  filter(covars_fix == "age",
         distribution == "separate_age",
         method %in% c("ACF", "HCF", "MA", "MA_Inc", "SBART")) %>%
  ggplot(aes(x=method, group=method)) +
  geom_boxplot(aes(y=coverage, color=method)) +
  geom_hline(aes(yintercept = 0.95), linetype = "dashed") +
  facet_grid(Linear ~ sds, scales = "free") +
  theme(axis.text.x = element_text(angle=90),
        text = element_text(size=12)) +
  ylab("Coverage in Target Sample")

Age and MADRS Moderator, Same Training Distributions

#boxplot across 100 people in target sample
target_ind %>%
  filter(covars_fix == "age, madrs",
         distribution == "same",
         method %in% c("ACF", "HCF", "MA", "MA_Inc", "SBART")) %>%
  ggplot(aes(x=method, group=method)) +
  geom_boxplot(aes(y=coverage, color=method)) +
  geom_hline(aes(yintercept = 0.95), linetype = "dashed") +
  facet_grid(Linear ~ sds, scales = "free") +
  theme(axis.text.x = element_text(angle=90),
        text = element_text(size=12)) +
  ylab("Coverage in Target Sample")

#average across all iterations
target_sum %>%
  filter(covars_fix == "age, madrs",
         distribution == "same",
         method %in% c("ACF", "HCF", "MA", "MA_Inc", "SBART")) %>%
  ggplot(aes(x=method)) +
  geom_point(aes(y=coverage_mean)) +
  # geom_errorbar(aes(ymin = coverage_mean - 1.96*coverage_sd,
  #                   ymax = min(coverage_mean + 1.96*coverage_sd, 1),
  #                   color=method)) +
  geom_hline(aes(yintercept = 0.95), linetype = "dashed") +
  facet_grid(Linear ~ sds) +
  scale_y_continuous(labels = scales::percent) +
  theme(axis.text.x = element_text(angle=90),
        text = element_text(size=12)) +
  ylab("Coverage in Target Sample")

#coverage by age - HCF
target_ind %>%
  filter(covars_fix == "age, madrs",
         distribution == "same",
         method == "HCF") %>%
  ggplot(aes(x=age, y=coverage)) +
  geom_point() +
  scale_y_continuous(labels = scales::percent) +
  facet_grid(Linear ~ sds, scales = "free") +
  ggtitle("Results Using HCF")

#coverage by madrs - HCF
target_ind %>%
  filter(covars_fix == "age, madrs",
         distribution == "same",
         method == "HCF") %>%
  ggplot(aes(x=madrs, y=coverage)) +
  geom_point() +
  scale_y_continuous(labels = scales::percent) +
  facet_grid(Linear ~ sds, scales = "free") +
  ggtitle("Results Using HCF")

Age and MADRS Moderator, Varying MADRS in Training Distributions

#boxplot across 100 people in target sample
target_ind %>%
  filter(covars_fix == "age, madrs",
         distribution == "varying_madrs",
         method %in% c("ACF", "HCF", "MA", "MA_Inc", "SBART")) %>%
  ggplot(aes(x=method, group=method)) +
  geom_boxplot(aes(y=coverage, color=method)) +
  geom_hline(aes(yintercept = 0.95), linetype = "dashed") +
  facet_grid(Linear ~ sds, scales = "free") +
  theme(axis.text.x = element_text(angle=90),
        text = element_text(size=12)) +
  ylab("Coverage in Target Sample")

Age and MADRS Moderator, Separate Age in Training Distributions

#boxplot across 100 people in target sample
target_ind %>%
  filter(covars_fix == "age, madrs",
         distribution == "separate_age",
         method %in% c("ACF", "HCF", "MA", "MA_Inc", "SBART")) %>%
  ggplot(aes(x=method, group=method)) +
  geom_boxplot(aes(y=coverage, color=method)) +
  geom_hline(aes(yintercept = 0.95), linetype = "dashed") +
  facet_grid(Linear ~ sds, scales = "free") +
  theme(axis.text.x = element_text(angle=90),
        text = element_text(size=12)) +
  ylab("Coverage in Target Sample")

Results by Method

For now, just using the setup where age is the fixed and random moderator, the training distributions are the same across trials, and the target distribution is the same as the training.

Meta-Analysis

#age, same target distribution
#mse
all_long %>%
  filter(Method %in% c("MA","MA_Inc"), covars_fix == "age",
         distribution == "same",
         grepl("mse", Metric) == T,
         sds != "1_1_0.5") %>%
  ggplot(aes(x=sds, y=Value)) +
  geom_boxplot(aes(color = Method)) +
  facet_grid(Dataset~Linear, scales = "free") +
  theme(axis.text.x = element_text(angle=90),
        text = element_text(size=12)) +
  xlab("Variability of Main, Trt, and Trt*Age Coefficients") +
  ylab("MSE")

#ggsave("Results/19Oct2023/MA_MSE_19Oct2023.jpeg")

#coverage
all_long %>%
  filter(Method %in% c("MA","MA_Inc"), covars_fix == "age",
         distribution == "same",
         grepl("coverage", Metric) == T,
         sds != "1_1_0.5") %>%
  ggplot(aes(x=sds, y=Value)) +
  geom_boxplot(aes(color = Method)) +
  geom_hline(aes(yintercept=.95), linetype = "dashed") +
  facet_grid(Dataset~Linear, scales = "free") +
  scale_y_continuous(labels = scales::percent) +
  theme(axis.text.x = element_text(angle=90),
        text = element_text(size=12)) +
  xlab("Variability of Main, Trt, and Trt*Age Coefficients") +
  ylab("CI Coverage Percent") 

#ggsave("Results/19Oct2023/MA_Cov_19Oct2023.jpeg")

Causal Forest

#age, same target distribution
#mse
all_long %>%
  filter(Method %in% c("HCF","ACF","HCF_Rand","ACF_Rand"), covars_fix == "age",
         distribution == "same",
         grepl("mse", Metric) == T,
         sds != "1_1_0.5") %>%
  ggplot(aes(x=sds, y=Value)) +
  geom_boxplot(aes(color = Method)) +
  facet_grid(Dataset~Linear, scales = "free") +
  theme(axis.text.x = element_text(angle=90),
        text = element_text(size=12)) +
  xlab("Variability of Main, Trt, and Trt*Age Coefficients") +
  ylab("MSE")

#ggsave("Results/19Oct2023/CF_MSE_19Oct2023.jpeg")

#coverage
all_long %>%
  filter(Method %in% c("HCF","ACF","HCF_Rand","ACF_Rand"), covars_fix == "age",
         distribution == "same",
         grepl("coverage", Metric) == T,
         sds != "1_1_0.5") %>%
  ggplot(aes(x=sds, y=Value)) +
  geom_boxplot(aes(color = Method)) +
  geom_hline(aes(yintercept=.95), linetype = "dashed") +
  facet_grid(Dataset~Linear, scales = "free") +
  scale_y_continuous(labels = scales::percent) +
  theme(axis.text.x = element_text(angle=90),
        text = element_text(size=12)) +
  xlab("Variability of Main, Trt, and Trt*Age Coefficients") +
  ylab("CI Coverage Percent") 

#ggsave("Results/19Oct2023/CF_Cov_19Oct2023.jpeg")

BART

#age, same target distribution
#mse
all_long %>%
  filter(Method %in% c("SBART","SBART_Rand"), covars_fix == "age",
         distribution == "same",
         grepl("mse", Metric) == T,
         sds != "1_1_0.5") %>%
  ggplot(aes(x=sds, y=Value)) +
  geom_boxplot(aes(color = Method)) +
  facet_grid(Dataset~Linear, scales = "free") +
  theme(axis.text.x = element_text(angle=90),
        text = element_text(size=12)) +
  xlab("Variability of Main, Trt, and Trt*Age Coefficients") +
  ylab("MSE")

#ggsave("Results/19Oct2023/BART_MSE_19Oct2023.jpeg")

#coverage
all_long %>%
  filter(Method %in% c("SBART","SBART_Rand"), covars_fix == "age",
         distribution == "same",
         grepl("coverage", Metric) == T,
         sds != "1_1_0.5") %>%
  ggplot(aes(x=sds, y=Value)) +
  geom_boxplot(aes(color = Method)) +
  geom_hline(aes(yintercept=.95), linetype = "dashed") +
  facet_grid(Dataset~Linear, scales = "free") +
  scale_y_continuous(labels = scales::percent) +
  theme(axis.text.x = element_text(angle=90),
        text = element_text(size=12)) +
  xlab("Variability of Main, Trt, and Trt*Age Coefficients") +
  ylab("CI Coverage Percent") 

#ggsave("Results/19Oct2023/BART_Cov_19Oct2023.jpeg")

Old

Target CI Coverage

#age, same target distribution
all_long %>%
  filter(Metric == "target_coverage", covars_fix == "age",
         distribution == "same") %>%
  ggplot(aes(x=sds, y=Value)) +
  geom_boxplot(aes(color = Method)) +
  geom_hline(aes(yintercept=.95), linetype = "dashed") +
  facet_wrap(~lin) +
  scale_y_continuous(labels = scales::percent, limits = c(.75, 1)) +
  labs(color="Method") +
  theme(axis.text.x = element_text(angle=90),
        text = element_text(size=12)) +
  xlab("Variability of Main, Trt, and Trt*Age Coefficients") +
  ylab("CI Coverage Percent in Target Sample") +
  ggtitle("Same Target Distribution")
## Warning: Removed 819 rows containing non-finite values (stat_boxplot).

# points version
all_avg_long %>%
  filter(Metric == "target_coverage", covars_fix == "age",
         distribution == "same") %>%
  ggplot(aes(x=sds, y=Value)) +
  geom_point(aes(color = Method)) +
  geom_hline(aes(yintercept=.95), linetype = "dashed") +
  facet_wrap(~lin) +
  scale_y_continuous(labels = scales::percent, limits = c(.75, 1)) +
  labs(color="Method") +
  theme(axis.text.x = element_text(angle=90),
        text = element_text(size=12)) +
  xlab("Variability of Main, Trt, and Trt*Age Coefficients") +
  ylab("CI Coverage Percent in Target Sample") +
  ggtitle("Same Target Distribution")
## Warning: Removed 2 rows containing missing values (geom_point).

Training CI Coverage

#age, same target distribution
all_long %>%
  filter(Metric == "train_coverage", covars_fix == "age",
         distribution == "same") %>%
  ggplot(aes(x=sds, y=Value)) +
  geom_boxplot(aes(color = Method)) +
  geom_hline(aes(yintercept=.95), linetype = "dashed") +
  facet_wrap(~lin) +
  scale_y_continuous(labels = scales::percent, limits = c(.75, 1)) +
  labs(color="Method") +
  theme(axis.text.x = element_text(angle=90),
        text = element_text(size=12)) +
  xlab("Variability of Main, Trt, and Trt*Age Coefficients") +
  ylab("CI Coverage Percent in Training Sample") +
  ggtitle("Same Target Distribution")
## Warning: Removed 11450 rows containing non-finite values (stat_boxplot).

Target Bias

#age, same target distribution
all_long %>%
  filter(Metric == "target_bias", covars_fix == "age",
         distribution == "same") %>%
  ggplot(aes(x=sds, y=Value)) +
  geom_boxplot(aes(color = Method)) +
  facet_wrap(~lin) +
  labs(color="Method") +
  theme(axis.text.x = element_text(angle=90),
        text = element_text(size=12)) +
  xlab("Variability of Main, Trt, and Trt*Age Coefficients") +
  ylab("Mean Absolute Bias in Target Sample") +
  ggtitle("Same Target Distribution")

Training Bias

#age, same target distribution
all_long %>%
  filter(Metric == "train_bias", covars_fix == "age",
         distribution == "same") %>%
  ggplot(aes(x=sds, y=Value)) +
  geom_boxplot(aes(color = Method)) +
  facet_wrap(~lin) +
  labs(color="Method") +
  theme(axis.text.x = element_text(angle=90),
        text = element_text(size=12)) +
  xlab("Variability of Main, Trt, and Trt*Age Coefficients") +
  ylab("Mean Absolute Bias in Training Sample") +
  ggtitle("Same Target Distribution")

Target CI Length

#age, same target distribution
all_long %>%
  filter(Metric == "target_length", covars_fix == "age",
         distribution == "same") %>%
  ggplot(aes(x=sds, y=Value)) +
  geom_boxplot(aes(color = Method)) +
  facet_wrap(~lin) + ylim(0,10) +
  labs(color="Method") +
  theme(axis.text.x = element_text(angle=90),
        text = element_text(size=12)) +
  xlab("Variability of Main, Trt, and Trt*Age Coefficients") +
  ylab("CI Length in Target Sample") +
  ggtitle("Same Target Distribution")
## Warning: Removed 1067 rows containing non-finite values (stat_boxplot).

Target CI Coverage

#age, same target distribution
all_long %>%
  filter(Metric == "target_coverage", covars_fix == "age") %>%
  ggplot(aes(x=sds, y=Value)) +
  geom_boxplot(aes(color = Method)) +
  geom_hline(aes(yintercept=.95), linetype = "dashed") +
  facet_grid(lin~distribution) +
  scale_y_continuous(labels = scales::percent, limits = c(.75, 1)) +
  labs(color="Method") +
  theme(axis.text.x = element_text(angle=90),
        text = element_text(size=12)) +
  xlab("Variability of Main, Trt, and Trt*Age Coefficients") +
  ylab("CI Coverage Percent in Target Sample") +
  ggtitle("Same Target Distribution")
## Warning: Removed 1818 rows containing non-finite values (stat_boxplot).

all_long %>%
  filter(Metric == "target_coverage", covars_fix == "age, madrs") %>%
  ggplot(aes(x=sds, y=Value)) +
  geom_boxplot(aes(color = Method)) +
  geom_hline(aes(yintercept=.95), linetype = "dashed") +
  facet_grid(lin~distribution) +
  scale_y_continuous(labels = scales::percent, limits = c(.75, 1)) +
  labs(color="Method") +
  theme(axis.text.x = element_text(angle=90),
        text = element_text(size=12)) +
  xlab("Variability of Main, Trt, and Trt*Age Coefficients") +
  ylab("CI Coverage Percent in Target Sample") +
  ggtitle("Same Target Distribution, Age and MADRS")
## Warning: Removed 258 rows containing non-finite values (stat_boxplot).

Training CI Coverage

#age, same target distribution
all_long %>%
  filter(Metric == "train_coverage", covars_fix == "age") %>%
  ggplot(aes(x=sds, y=Value)) +
  geom_boxplot(aes(color = Method)) +
  geom_hline(aes(yintercept=.95), linetype = "dashed") +
  facet_grid(lin~distribution) +
  scale_y_continuous(labels = scales::percent, limits = c(.75, 1)) +
  labs(color="Method") +
  theme(axis.text.x = element_text(angle=90),
        text = element_text(size=12)) +
  xlab("Variability of Main, Trt, and Trt*Age Coefficients") +
  ylab("CI Coverage Percent in Training Sample") +
  ggtitle("Same Target Distribution")
## Warning: Removed 33496 rows containing non-finite values (stat_boxplot).